#-----------------------------------------------------------------------------------------------------------------
#-----------------------------Figure 2. Sample Distribution by Respondents' Home Provinces------------------------
#-----------------------------------------------------------------------------------------------------------------

#R version 4.0.3 (2020-10-10)

## Required packages (May need to restart R program after installation)
install.packages("rgdal")
install.packages("sf")
install.packages("Rcpp")
install.packages("readstata13")
install.packages("plyr")
install.packages("data.table")
install.packages("ggplot2")
install.packages("grid")
install.packages("ggrepel")
install.packages("ggsn")
install.packages("Rcpp")

## Clear R workspaces
rm(list = ls())
loadedNamespaces()

## Load libraries
library(readstata13) # for loading data
library(plyr) # for data processing
library(data.table) # for data processing
library(ggrepel) # for plotting map
#library(ggsn) # for scale bar `scalebar`
library(Rcpp) 

## Set working directory
#Please set the working directory to the folder where the readme.txt file is located
setwd("xxx/xxx/xxx")


## load data
d<-read.dta13("data/collegesurvey.dta")
d[d=="TAIWAN" | d=="CAMBODIA" | d=="RUSSIA"] <- "ABROAD" # Replacing born abroad


## draw circles on provincial capitals
d$forcount <- 1
jingwei<-read.csv("maps/prov_capital.csv",head=T)[,c("provgb","longitude","latitude")]
jingwei<-rbind(jingwei,c(99,127,22)) # abroad

table(d$born_prov)
counts<-c(by(d,d$born_prov,function(x) sum(x$forcount)))
counts<-counts[-1]

prov<-c("Abroad","Anhui", "Beijing", "Chongqing", "Fujian", "Gansu", "Guangdong", "Guangxi", "Guizhou", "Hainan", "Hebei", "Heilongjiang", "Henan", "Hubei", "Hunan", "Jiangsu", "Jiangxi", "Jilin", "Liaoning", "Inner Mongolia", "Ningxia", "Qinghai", "Shandong", "Shaanxi", "Shanxi", "Sichuan", "Tianjin", "Xinjiang", "Tibet", "Yunnan", "Zhejiang")
count.prov<-cbind.data.frame(c(99, 34, 11, 50, 35, 62, 44, 45, 52, 46, 13, 23, 41, 42, 43, 32, 36, 22, 21, 15, 64, 63, 37, 61, 14, 51, 12, 65, 54, 53, 33),names(counts),prov,
                             as.numeric(round(counts)))
colnames(count.prov)<-c("provgb","provcap","prov","freq")
circle<-join(count.prov, jingwei, by='provgb', type='left', match='all')

local_fir_dir <- "/maps" # local the .shp file is stored
china_map <- rgdal::readOGR("maps/bou2_4p.shp")
china_province = setDT(china_map@data)
setnames(china_province, "NAME", "province")

china_province[, province:=iconv(province, from = "GBK", to = "UTF-8")] 

china_province[, id:= .I-1] 
china_province[, table(province)]

china_province[, province:= as.factor(province)]

dt_china = setDT(fortify(china_map))
dt_china[, id:= as.numeric(id)]

setkey(china_province, id); setkey(dt_china, id)
dt_china <- china_province[dt_china]

dt_china$gb<-dt_china$ADCODE93/10000

input_data <- data.table(circle)

setkey(input_data, provgb)
setkey(dt_china, gb)

china_map_pop <- input_data[dt_china[AREA>0.1,]]


## Draw frequency map
pdf("outputmain/map_freq.pdf", 8.5, 6) 
ggplot(china_map_pop) +
  geom_polygon(aes(x = long, y = lat, group = group), fill="grey", alpha=0.3)+
  geom_path(aes(x = long, y = lat, group = group), size = 0.2) +
  geom_point(data=input_data, aes(x=longitude, y=latitude, size=freq, fill = freq, colour=freq, alpha=freq),  stroke = 1) +
  scale_size_continuous(range=c(2,25)) + scale_alpha(range=c(0.4,0.8)) +
  theme_void() +  theme(legend.position="none") +  annotate("text", x = 127, y=23.5, label = "Abroad", size=3.5)
dev.off()

